Amiga Format CD 52

home *** CD-ROM | disk | FTP | other *** search

/ Amiga Format CD 52 / Amiga Format AFCD52 (Issue 136, May 2000).iso / -serious- / programming / other / jikes-1.11 / src / stream.h < prev next >

Wrap

C/C++ Source or Header | 2000-02-23 | 12KB | 409 lines

// $Id: stream.h,v 1.15 2000/01/07 00:25:53 lord Exp $ // // This software is subject to the terms of the IBM Jikes Compiler // License Agreement available at the following URL: // http://www.ibm.com/research/jikes. // Copyright (C) 1996, 1998, International Business Machines Corporation // and others. All Rights Reserved. // You must accept the terms of that agreement to use this software. // #ifndef stream_INCLUDED #define stream_INCLUDED #include "config.h" #include <sys/stat.h> #include <limits.h> #include <iostream.h> #include <stddef.h> #include <stdio.h> #include "javadef.h" #include "javasym.h" #include "tuple.h" #include "tab.h" #include "lookup.h" class Control; class Input_info; class Scanner; class Symbol; class FileSymbol; class ZipFile; class StreamError { public: enum StreamErrorKind { BAD_TOKEN, BAD_OCTAL_CONSTANT, EMPTY_CHARACTER_CONSTANT, UNTERMINATED_CHARACTER_CONSTANT, UNTERMINATED_COMMENT, UNTERMINATED_STRING_CONSTANT, INVALID_HEX_CONSTANT, INVALID_FLOATING_CONSTANT_EXPONENT, INVALID_UNICODE_ESCAPE }; unsigned start_location, end_location; StreamErrorKind kind; void Initialize(StreamErrorKind kind_, unsigned start_location_, unsigned end_location_) { kind = kind_; start_location = start_location_; end_location = end_location_; return; } }; // // LexStream holds a stream of tokens generated from an input and // provides methods to retrieve information from the stream. // class LexStream { public: typedef int TypeIndex; typedef int TokenIndex; typedef int CommentIndex; enum { LEX_INFINITY = INT_MAX }; // the largest possible value for TokenIndex FileSymbol *file_symbol; inline TokenIndex Next(TokenIndex i) { return (++i < token_stream.Length() ? i : token_stream.Length() - 1); } inline TokenIndex Previous(TokenIndex i) { return (i <= 0 ? 0 : i - 1); } inline TokenIndex Peek() { return Next(index); } inline void Reset(TokenIndex i = 1) { index = Previous(i); } inline TokenIndex Gettoken() { return index = Next(index); } inline TokenIndex Gettoken(TokenIndex end_token) { return index = (index < end_token ? Next(index) : token_stream.Length() - 1); } inline TokenIndex Badtoken() { return 0; } inline unsigned Kind(TokenIndex i) { return tokens[i].Kind(); } inline unsigned Location(TokenIndex i) { return tokens[i].Location(); } inline unsigned Line(TokenIndex i) { return FindLine(tokens[i].Location()); } inline unsigned Column(TokenIndex i) { return columns ? columns[i] : (input_buffer ? FindColumn(tokens[i].Location()) : 0); } inline bool AfterEol(TokenIndex i) { return (i < 1 ? true : Line(i - 1) < Line(i)); } inline bool IsDeprecated(TokenIndex i) { return tokens[i].Deprecated(); } inline TokenIndex MatchingBrace(TokenIndex i) { return tokens[i].additional_info.right_brace; } wchar_t *NameString(TokenIndex i) { return (NameSymbol(i) || LiteralSymbol(i) ? tokens[i].additional_info.symbol -> Name() : KeywordName(tokens[i].Kind())); } int NameStringLength(TokenIndex i) { return (NameSymbol(i) || LiteralSymbol(i) ? tokens[i].additional_info.symbol -> NameLength() : wcslen(KeywordName(tokens[i].Kind()))); } class LiteralSymbol *LiteralSymbol(TokenIndex); class NameSymbol *NameSymbol(TokenIndex); char *FileName(); size_t FileNameLength(); inline int LineLength(unsigned line_no) { return locations[line_no + 1] - locations[line_no]; } inline int LineStart(unsigned line_no) { return locations[line_no]; } inline int LineEnd(unsigned line_no) { return locations[line_no + 1] - 1; } inline int LineSegmentLength(TokenIndex i) { return Tab::Wcslen(input_buffer, tokens[i].Location(), LineEnd(Line(i))); } // // For a sequence of tokens in a given range find out how many large // characters they contain and compute the appropriate offset. // inline int WcharOffset(TokenIndex start, TokenIndex end) { int offset = 0; for (TokenIndex i = start; i <= end; i++) { for (wchar_t *str = NameString(i); *str; str++) { if (*str > 0xff) offset += 5; } } return offset; } // // When only an end token is supplied, the start token is assume to be the first one on the same line. // inline int WcharOffset(TokenIndex end) { TokenIndex start; unsigned the_line = Line(end); for (start = end; Line(start) == the_line; start--) ; start++; return WcharOffset(start, end); } wchar_t *InputBuffer() { return input_buffer; } size_t InputBufferLength() { return input_buffer_length; } CommentIndex FirstComment(TokenIndex); inline int NumTypes() { return type_index.Length(); } inline TokenIndex Type(int i) { return types[i]; } inline int NumTokens() { return token_stream.Length(); } inline int NumComments() { return comment_stream.Length(); } inline TokenIndex PrecedingToken(CommentIndex i) { return comments[i].previous_token; } inline unsigned CommentLocation(CommentIndex i) { return comments[i].location; } inline wchar_t *CommentString(CommentIndex i) { return comments[i].string; } inline int CommentStringLength(CommentIndex i) { return comments[i].length; } inline int NumBadTokens() { return bad_tokens.Length(); } #ifdef TEST int file_read; #endif //* //* Constructors and Destructor. //* LexStream(Control &control_, FileSymbol *file_symbol_) : file_symbol(file_symbol_), #ifdef TEST file_read(0), #endif tokens(NULL), columns(NULL), token_stream(12, 16), comments(NULL), comment_stream(10, 8), locations(NULL), line_location(12, 8), initial_reading_of_input(true), input_buffer(NULL), input_buffer_length(0), comment_buffer(NULL), control(control_) {} bool ComputeColumns() { RereadInput(); if (input_buffer) InitializeColumns(); DestroyInput(); return (columns != NULL); } void RereadInput(); ~LexStream(); void DestroyInput() { delete [] input_buffer; input_buffer = NULL; delete [] comment_buffer; comment_buffer = NULL; } void SortMessages(); void PrintMessages(); void PrintEmacsMessage(int); void PrintSmallSource(int); void PrintLargeSource(int); void PrintMessage(StreamError::StreamErrorKind); void SetUpComments() { RereadInput(); // // Calculate the length of the string required to save the comments. // Allocate the buffer, save the comments in the buffer and update their // respective "string" pointer. // int length = 0, i; for (i = 1; i < comment_stream.Length(); i++) length += (comments[i].length + 1); comment_buffer = new wchar_t[length]; wchar_t *ptr = comment_buffer; for (i = 1; i < comment_stream.Length(); i++) { memmove(ptr, &(input_buffer[comments[i].location]), comments[i].length * sizeof(wchar_t)); comments[i].string = ptr; ptr += comments[i].length; *ptr++ = U_NULL; } return; } #ifdef TEST void LexStream::Dump(); // temporary function used to dump token stream. #endif // // Return the total size of space allocated for the tokens. // size_t TokenSpaceAllocated(void) { return token_stream.Length() * sizeof(Token); } // // Return the total size of space allocated for the comments. // size_t CommentSpaceAllocated(void) { return comment_stream.Length() * sizeof(Comment); } private: int hexvalue(wchar_t ch); enum UnicodeLexerState { RAW, CR, QUOTE, UNICODE_ESCAPE, UNICODE_ESCAPE_DIGIT_0, UNICODE_ESCAPE_DIGIT_1, UNICODE_ESCAPE_DIGIT_2 }; friend class Scanner; class Token { // // It is expected that a location will be set for every token. Therefore, // as we are setting the location, we also reset the deprecated bit to 0. // If it is subsequently discovered that the token is followed by one or more // deprecated tags then the bit is set to 1 by an invocation of the // function SetDeprecated. Note that a better way to resetting all the bits in // "info" is to use the function ResetInfoAndSetLocation defined below, instead // of using SetLocation // inline void SetLocation(unsigned location) { assert(location <= 0x00FFFFFF); info = (info & 0x0000007F) | (location << 8); } public: unsigned info; union { Symbol *symbol; TokenIndex right_brace; } additional_info; // // To just reset the info, this function should be invoked with a location value of 0. // inline void ResetInfoAndSetLocation(unsigned location) { assert(location <= 0x00FFFFFF); info = (location << 8); additional_info.symbol = NULL; } inline unsigned Location() { return (info >> 8); } inline void SetKind(unsigned kind) { assert(kind <= 0x0000007F); info = (info & 0xFFFFFF80) | kind; } inline unsigned Kind() { return (info & 0x0000007F); } inline void SetDeprecated() { info |= 0x00000080; } inline bool Deprecated() { return ((info & 0x00000080) != 0); } inline void SetSymbol(Symbol *symbol) { additional_info.symbol = symbol; } inline void SetRightBrace(TokenIndex rbrace) { additional_info.right_brace = rbrace; } }; TokenIndex GetNextToken(unsigned location = 0) { TokenIndex index = token_stream.NextIndex(); token_stream[index].ResetInfoAndSetLocation(location); return index; } class Comment { public: TokenIndex previous_token; unsigned location; unsigned length; wchar_t *string; }; Tuple<StreamError> bad_tokens; TokenIndex index; Token *tokens; unsigned short *columns; ConvertibleArray<Token> token_stream; Comment *comments; ConvertibleArray<Comment> comment_stream; unsigned *locations; ConvertibleArray<unsigned> line_location; TokenIndex *types; ConvertibleArray<TokenIndex> type_index; void InitializeColumns(); void CompressSpace(); bool initial_reading_of_input; wchar_t *input_buffer; size_t input_buffer_length; wchar_t *comment_buffer; Control &control; void ReadInput(); void ProcessInput(char *, long); void ProcessInputAscii(char *, long); #ifdef HAVE_LIB_ICU_UC void ProcessInputUnicode(char *, long); #endif wchar_t *KeywordName(int); unsigned FindLine(unsigned location); unsigned FindColumn(unsigned location) { assert(locations); return Tab::Wcslen(input_buffer, locations[FindLine(location)], location); } }; #endif